<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Divide and Conquer | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="stopwords.html" title="Stopwords: Performance Versus Precision">
<link rel="prev" href="stopwords-performance.html" title="Stopwords and Performance">
<link rel="next" href="stopwords-phrases.html" title="Stopwords and Phrase Queries">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="languages.html">Dealing with Human Language</a></span>
»
<span class="breadcrumb-link"><a href="stopwords.html">Stopwords: Performance Versus Precision</a></span>
»
<span class="breadcrumb-node">Divide and Conquer</span>
</div>
<div class="navheader">
<span class="prev">
<a href="stopwords-performance.html">« Stopwords and Performance</a>
</span>
<span class="next">
<a href="stopwords-phrases.html">Stopwords and Phrase Queries »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="common-terms"></a>Divide and Conquer<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/240_Stopwords/40_Divide_and_conquer.asciidoc">edit</a>
</h2>
</div></div></div>
<p>The terms in a query string can be divided into more-important (low-frequency)
and less-important (high-frequency) terms. Documents that match only the less
important terms are probably of very little interest.  Really, we want
documents that match as many of the more important terms as possible.</p>
<p>The <code class="literal">match</code> query accepts a <code class="literal">cutoff_frequency</code> parameter, which allows it to
divide the terms in the query string into a low-frequency and high-frequency
group. The low-frequency group (more-important terms) form the bulk of the
query, while the high-frequency group (less-important terms) is used only for
scoring, not for matching. By treating these two groups differently, we can
gain a real boost of speed on previously slow queries.</p>
<div class="sidebar">
<div class="titlepage"><div><div>
<p class="title"><strong>Domain-Specific Stopwords</strong></p>
</div></div></div>
<p>One of the benefits of <code class="literal">cutoff_frequency</code> is that you get <em>domain-specific</em>
stopwords for free. For instance, a website about movies may use the words
<em>movie</em>, <em>color</em>, <em>black</em>, and <em>white</em> so often that they could be
considered almost meaningless.  With the <code class="literal">stop</code> token filter, these domain-specific terms would have to be added to the stopwords list manually. However,
because the <code class="literal">cutoff_frequency</code> looks at the actual frequency of terms in the
index,  these words would be classified as <em>high frequency</em> automatically.</p>
</div>
<p>Take this query as an example:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{
  "match": {
    "text": {
      "query": "Quick and the dead",
      "cutoff_frequency": 0.01 <a id="CO172-1"></a><i class="conum" data-value="1"></i>
    }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO172-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>Any term that occurs in more than 1% of documents is considered to be high
frequency. The <code class="literal">cutoff_frequency</code> can be specified as a fraction (<code class="literal">0.01</code>)
or as an absolute number (<code class="literal">5</code>).</p>
</td>
</tr>
</table>
</div>
<p>This query uses the <code class="literal">cutoff_frequency</code> to first divide the query terms into a
low-frequency group (<code class="literal">quick</code>, <code class="literal">dead</code>) and a high-frequency group (<code class="literal">and</code>,
<code class="literal">the</code>). Then, the query is rewritten to produce the following <code class="literal">bool</code> query:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{
  "bool": {
    "must": { <a id="CO173-1"></a><i class="conum" data-value="1"></i>
      "bool": {
        "should": [
          { "term": { "text": "quick" }},
          { "term": { "text": "dead"  }}
        ]
      }
    },
    "should": { <a id="CO173-2"></a><i class="conum" data-value="2"></i>
      "bool": {
        "should": [
          { "term": { "text": "and" }},
          { "term": { "text": "the" }}
        ]
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO173-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>At least one low-frequency/high-importance term <em>must</em> match.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO173-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>High-frequency/low-importance terms are entirely optional.</p>
</td>
</tr>
</table>
</div>
<p>The <code class="literal">must</code> clause means that at least one of the low-frequency terms—<code class="literal">quick</code> or <code class="literal">dead</code>—_must_ be present for a document to be considered a
match. All other documents are excluded.  The <code class="literal">should</code> clause then looks for
the high-frequency terms <code class="literal">and</code> and <code class="literal">the</code>,  but only in the documents collected
by the <code class="literal">must</code> clause. The sole job of the <code class="literal">should</code> clause is to score a
document like “Quick <em>and the</em> dead” higher than “_The_ quick but
dead”.  This approach greatly reduces the number of documents that need to be
examined and scored.</p>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<p>Setting the operator parameter to <code class="literal">and</code> would make <em>all</em> low-frequency terms
required, and score documents that contain <em>all</em> high-frequency terms higher.
However, matching documents would not be required to contain all high-frequency terms.  If you would prefer all low- and high-frequency terms to be
required, you should use a <code class="literal">bool</code> query instead.   As we saw in
<a class="xref" href="stopwords-performance.html#stopwords-and" title="and Operator">and Operator</a>, this is already an efficient query.</p>
</div>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_controlling_precision_2"></a>Controlling Precision<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/240_Stopwords/40_Divide_and_conquer.asciidoc">edit</a>
</h3>
</div></div></div>
<p>The <code class="literal">minimum_should_match</code> parameter can be combined with <code class="literal">cutoff_frequency</code>
but it applies to only the low-frequency terms.  This query:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{
  "match": {
    "text": {
      "query": "Quick and the dead",
      "cutoff_frequency": 0.01,
      "minimum_should_match": "75%"
    }
}</pre>
</div>
<p>would be rewritten as follows:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{
  "bool": {
    "must": {
      "bool": {
        "should": [
          { "term": { "text": "quick" }},
          { "term": { "text": "dead"  }}
        ],
        "minimum_should_match": 1 <a id="CO174-1"></a><i class="conum" data-value="1"></i>
      }
    },
    "should": { <a id="CO174-2"></a><i class="conum" data-value="2"></i>
      "bool": {
        "should": [
          { "term": { "text": "and" }},
          { "term": { "text": "the" }}
        ]
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO174-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>Because there are only two terms, the original 75% is rounded down
to <code class="literal">1</code>, that is: <em>one out of two low-terms must match</em>.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO174-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>The high-frequency terms are still optional and used only for scoring.</p>
</td>
</tr>
</table>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_only_high_frequency_terms"></a>Only High-Frequency Terms<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/240_Stopwords/40_Divide_and_conquer.asciidoc">edit</a>
</h3>
</div></div></div>
<p>An <code class="literal">or</code> query for high-frequency terms only—`‘To be, or not to be’'—is
the worst case for performance. It is pointless to score <em>all</em> the
documents that contain only one of these terms in order to return just the top
10 matches. We are really interested only in documents in which the terms all occur
together, so in the case where there are no low-frequency terms, the query is
rewritten to make all high-frequency terms required:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{
  "bool": {
    "must": [
      { "term": { "text": "to" }},
      { "term": { "text": "be" }},
      { "term": { "text": "or" }},
      { "term": { "text": "not" }},
      { "term": { "text": "to" }},
      { "term": { "text": "be" }}
    ]
  }
}</pre>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_more_control_with_common_terms"></a>More Control with Common Terms<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/240_Stopwords/40_Divide_and_conquer.asciidoc">edit</a>
</h3>
</div></div></div>
<p>While the high/low frequency functionality in the <code class="literal">match</code> query is useful,
sometimes you want more control over how the high- and low-frequency groups
should be handled.  The <code class="literal">match</code> query exposes a subset of the
functionality available in the <code class="literal">common</code> terms query.</p>
<p>For instance, we could make all low-frequency terms required, and score only
documents that have 75% of all high-frequency terms with a query like this:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{
  "common": {
    "text": {
      "query":                  "Quick and the dead",
      "cutoff_frequency":       0.01,
      "low_freq_operator":      "and",
      "minimum_should_match": {
        "high_freq":            "75%"
      }
    }
  }
}</pre>
</div>
<p>See the <a href="/guide/en/elasticsearch/reference/2.4/query-dsl-common-terms-query.html" class="ulink" target="_top"><code class="literal">common</code> terms query</a> reference page for more options.</p>
</div>

</div>
<div class="navfooter">
<span class="prev">
<a href="stopwords-performance.html">« Stopwords and Performance</a>
</span>
<span class="next">
<a href="stopwords-phrases.html">Stopwords and Phrase Queries »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
